# Analysis of Real World Countries

rm(list = ls())

# print date and time of run
print("Starting date and time")
Sys.time()

# load packages and functions
R_folder <- "INSERT HERE"
source(paste0(R_folder, "survey_functions.R"))

# display the output
output_print <- TRUE
# return the main results
return_main <- TRUE

# images directory
images_directory <- "INSERT HERE"

# grayscale for the graphics or not
#print_colormodel <- "srgb" # in color
print_colormodel <- "grey"

# load data
data_folder <- "INSERT HERE"

################################
# Part 1: Real World Data

# load the data sets 

qog <- read.dta(paste0(data_folder, "qog_bas_ts_jan15.dta"))
qog <- qog[qog$year==2008,]
##############################
# Region Question
levels(qog$ht_region)
ee <- ifelse(qog$ht_region==levels(qog$ht_region)[1], 1, 0)
la <- ifelse(qog$ht_region==levels(qog$ht_region)[2], 1, 0)
na <- ifelse(qog$ht_region==levels(qog$ht_region)[3], 1, 0)
sa <- ifelse(qog$ht_region==levels(qog$ht_region)[4], 1, 0)
we <- ifelse(qog$ht_region==levels(qog$ht_region)[5], 1, 0)
ea <- ifelse(qog$ht_region==levels(qog$ht_region)[6], 1, 0)
se <- ifelse(qog$ht_region==levels(qog$ht_region)[7], 1, 0)
so <- ifelse(qog$ht_region==levels(qog$ht_region)[8], 1, 0)
pa <- ifelse(qog$ht_region==levels(qog$ht_region)[9], 1, 0)
ca <- ifelse(qog$ht_region==levels(qog$ht_region)[10], 1, 0)
dem <- ifelse(qog$chga_dem=="1. Democracy", 1, 0)
muslim <- qog$lp_muslim80
m_res <- data.frame(regions = c("All Countries", levels(qog$ht_region)),
           correlation = NA)
m_res[1,2] <- round(cor(dem, muslim, use = "complete.obs"), 3)
for (i in 2:nrow(m_res)) {
  m_res[i,2] <- suppressWarnings(cor(dem[qog$ht_region==levels(qog$ht_region)[i-1]],
      muslim[qog$ht_region==levels(qog$ht_region)[i-1]],
                              use = "complete.obs") %>% round(digits = 3))
}
# Supplementary Appendix Table 5
stargazer(m_res, summary = FALSE, rownames = FALSE)

regions <- data.frame(ee, la, na, sa, we, ea, se, so, pa, ca, dem)
regions <- regions[is.na(qog$chga_dem)==FALSE,]
freq_dem <- table(qog$ht_region, dem)
perc_dem <- data.frame(regions = names(freq_dem[,2]/(freq_dem[,1]+freq_dem[,2])),
                       perc = 100*freq_dem[,2]/(freq_dem[,1]+freq_dem[,2]))
perc_dem$regions <- as.character(gsub(pattern = "[1-9]. |10. ", 
                                      replacement = "", perc_dem$regions))
perc_dem$regions[perc_dem$regions == "Western Europe and North America"] <- "Western Europe"
perc_dem[11,1] <- "North America"
perc_dem[11,2] <- as.numeric(100)
perc_dem$regions <- factor(perc_dem$regions, levels = perc_dem$regions[order(perc_dem$perc)])
muslim <- qog$lp_muslim80

# Percent Muslim (1980) by Democracy
robustse(model = lm(muslim ~ dem))
cor(muslim, dem, use = "complete.obs")
# Percent Muslim (1980) by Democracy (North Africa and Middle East Only)
robustse(model = lm(muslim[na == 1] ~ dem[na == 1]))
cor(muslim[na == 1], dem[na == 1], use = "complete.obs")


ggplot(data = perc_dem, mapping = aes(x = regions, y = perc)) + 
  geom_bar(stat="identity") + coord_flip() + 
  ylab("Percent of Countries That Are Democracies") +
  xlab("Regions") + theme_bw()
ggsave(paste0(images_directory, "Supplementary_Appendix_Figure_2.pdf"), width = 7, height = 3)
diff.func <- function(data.set=vars, col.num){
  svar <- data.set[,col.num]
  dem <- regions$dem
  missing <- sum(is.na(svar))/length(svar)
  print(missing)
  if (missing!=1){
    return(c(coeftest(lm(svar~dem))[2,], missing))
  } else{
    return(rep(NA, 5))
  }
}

res <- matrix(NA, ncol(regions)-1, 5)
for (i in 1:(ncol(regions)-1)){
  res[i,] <- diff.func(data.set=regions, col.num=i)
}
res <- data.frame(res)
names(res) <- c("coef", "se", "t", "p.val", "missing")
res$var <- names(regions)[1:ncol(regions)-1]
res$lab <- levels(qog$ht_region)
res <- res[order(-abs(res$coef), res$p.val),]
res$p.val <- round(res$p.val, 3)
res <- res[,c(7,1,2)]
res[,2:3] <- round(res[,2:3], 3)
stargazer(res, summary = F, rownames=F)

# Quality of Government
qog <- read.dta(paste0(data_folder, "qog_bas_ts_jan15.dta"))
D <- data.frame(qog$ccodecow[qog$year==2008], qog$chga_dem[qog$year==2008])
names(D) <- c("ccodecow", "dem")
D$dem <- ifelse(D$dem=="1. Democracy", 1, 0)
qog <- qog[qog$year==1998,]
###################################
# Merge in Other Data: alliance
alliance <- read.dta(paste0(data_folder, "alliance_v4.1_by_dyad_yearly.dta"))
alliance <- alliance[alliance$state_name1=="United States of America" & 
                       alliance$year==1998,]
alliance$ccodecow <- alliance$ccode2
alliance <- alliance[,c(14:17,20)]
alliance <- aggregate(alliance, by=list(alliance$ccodecow), FUN=sum, na.rm=TRUE)
qog <- merge(x=qog, y=alliance, all.x = T, by="ccodecow")
qog$defense[is.na(qog$defense)] <- 0
qog$neutrality[is.na(qog$neutrality)] <- 0
qog$nonaggression[is.na(qog$nonaggression)] <- 0
qog$entente[is.na(qog$entente)] <- 0
qog$anytreaty <- ifelse(qog$defense+qog$neutrality+
                          qog$nonaggression+qog$entente>0, 1, 0)
###################################
# Merge in Other Data: trade
trade <- read.csv(paste0(data_folder, "dyadic_trade_3.0.csv"))
trade <- trade[trade$importer1=="United States of America" & trade$year==1998,]
trade$ccodecow <- trade$ccode2
trade <- trade[,c(6,7,15)]
trade <- read.csv(paste0(data_folder, "dyadic_trade_3.0.csv"))
trade <- trade[trade$importer1=="United States of America" & trade$year==2005,]
trade$ccodecow <- trade$ccode2
net <- trade$flow1 + trade$flow2
quantile(x = net[net>=0], probs = seq(0,1,0.2))
median(net[net>10000])
qog <- merge(x=qog, y=trade, all.x = T, by = "ccodecow")
qog$nettrade <- qog$flow1+qog$flow2
##################################
# Merge in Other Data: Joint Military Exercise
jme <- read.csv(paste0(data_folder, "jme_v1.1.csv"), stringsAsFactors=FALSE)
jme <- jme[jme$s.year==1998,]
jme$keep <- c()
for (i in 1:nrow(jme)){
  linei <- jme[i,]
  jme$keep[i] <- ifelse(sum(grepl(pattern = "USA", linei))>0,1,0)
}
jme <- jme[jme$keep==1,c(15:30,34:46)]
bag <- c()
for (i in 1:ncol(jme)){
  bag <- c(bag, jme[,i])
}
jme <- data.frame(table(bag))
jme <- jme[jme$bag!="" & jme$bag!="USA",]
names(jme) <- c("ccodealp", "joint.me")
qog <- merge(x=qog, y=jme, all.x = T, by="ccodealp")
qog$joint.me[is.na(qog$joint.me)] <- 0
##################################
# Merge in Other Data: Foreign Direct Investment data
fdi <- read.csv(paste0(data_folder, "fdi_data.csv"))
names(fdi)[1] <- "cname"
qog <- merge(x=qog, y=fdi, all.x= T, by="cname")
##################################
# Merge in Other Data: Capability
cap <- read.csv(paste0(data_folder, "NMC_v4_0.csv"), stringsAsFactors=FALSE)
cap <- cap[cap$year==1998,]
cap$ccodecow <- cap$ccode
cap <- read.csv(paste0(data_folder, "NMC_v4_0.csv"), stringsAsFactors=FALSE)
cap <- cap[cap$year==2005,]
quantile(x = cap$milex[cap$milex>=0]*1000/(1*10^6), probs = seq(0,1,0.2))
cap_val <- cap$milex[cap$milex>=0]*1000/(1*10^6)
median(cap_val[cap_val > 3500])
median(x = cap$milex[cap$milex>=0]*1000/(1*10^6)[cap$milex[cap$milex>=0]*1000/(1*10^6)>3500])
cap$ccodecow <- cap$ccode
qog <- merge(x=qog, y=cap, all.x=T, by="ccodecow")
#################################
# Merge in Other Data: Majority White
white <- read.dta(paste0(data_folder, "majority_white.dta"))
white <- white[,c(1,3)]
names(white)[2] <- "ccodecow"
white$ccodecow <- as.integer(white$ccodecow)
qog <- merge(x = qog, y = white, all.x = T, by = "ccodecow")
qog <- qog[!duplicated(qog$cname),]
##################################
# combine the datasets
test <- merge(x=qog, y=D, by = "ccodecow")
test <- test[is.na(test$dem)==FALSE,]
test <- test[test$cname!="United States",]
vars <- dplyr::select(test, ajr_settmort, 
                        al_ethnic, 
                        al_language,
                        al_religion,
                        bl_asy25f,
                        bl_asy25mf,
                        dr_eg,
                        dr_sg,
                        el_avelf,
                        epi_epi,
                        fe_etfra, 
                        fi_index,
                        fi_index_cl,
                        gle_cgdpc,
                        gle_rgdpc, 
                        hf_efiscore,
                        hf_prights,
                        hf_trade,
                        ht_colonial,
                        imf_exp,
                        imf_gd,
                        imf_gdpgr,
                        imf_pop,
                        imf_rev,
                        imf_ue,
                        lp_catho80,
                        lp_muslim80,
                        lp_protmg80,
                        ross_gas_netexpc,
                        ross_gas_prod,
                        ross_gas_value,
                        ross_oil_netexpc,
                        ross_oil_prod,
                        ross_oil_value,
                        ucdp_type1,
                        ucdp_type3,
                        ucdp_type4, 
                        une_durce,
                        une_durp,
                        une_durpp,
                        une_durs,
                        une_gerppt,
                        une_gerpt,
                        une_gerst,
                        une_hiv,
                        une_imr,
                        une_leb,
                        une_popgr,
                        une_rp,
                        unna_ahff,
                        unna_con,
                        unna_gdp,
                        unna_gse,
                        unna_gsi,
                        unna_man,
                        unna_mmu,
                        unna_pop,
                        unna_tsc,
                        unna_wrrh,
                        wdi_agrvagdp,
                        wdi_altnucen,
                        wdi_armedf,
                        wdi_armedfper,
                        wdi_broadband,
                        wdi_co2mtpc,
                        wdi_dofdcal,
                        wdi_elpowconpc,
                        wdi_emp,
                        wdi_empagr,
                        wdi_empind,
                        wdi_empser,
                        wdi_energyimp,
                        wdi_enusektoepc,
                        wdi_expfuel,
                        wdi_expmilgdp,
                        wdi_forestarea,
                        wdi_gdpgr,
                        wdi_hetot,
                        wdi_indvagdp,
                        wdi_internetuse,
                        wdi_landagr,
                        wdi_landarea,
                        wdi_mobile,
                        wdi_mortuf, 
                        wdi_pop014,
                        wdi_pop1564,
                        wdi_pop65,
                        wdi_poprurper,
                        wdi_popurbper,
                        wdi_refasylum,
                        wdi_reforigin,
                        wdi_scitecjournal,
                        wdi_servagdp,
                        wdi_taxrev,
                        wdi_trade,
                        wdi_unempfilo,
                        wdi_unempilo, 
                        wdi_unempmilo,
                        wdi_unempyfilo,
                        wdi_unempyilo,
                        wdi_unempymilo,
                   majority_white,
                   irst,
                   milex, 
                   milper,
                   pec,
                   tpop,
                   upop,
                   cinc,
                   defense,
                   neutrality,
                   nonaggression,
                   entente,
                   anytreaty,
                   flow1,
                   flow2,
                   nettrade,
                   joint.me,
                   transactions1998,
                   income1998,
                   position1998,
                   dem)
vars$ht_colonial <- ifelse(vars$ht_colonial=="5. British", 0, 1)
cor(vars$dem, vars$lp_muslim80, use = "complete.obs")
diff.func <- function(data.set=vars, col.num){
  var <- data.set[,col.num]
  svar <- c(scale(x = var, center = T, scale = T))
  dem <- data.set$dem
  missing <- sum(is.na(svar))/length(svar)
  print(missing)
  if (missing!=1){
    return(c(robustse(lm(svar ~ dem))[2,], missing))
  } else{
    return(rep(NA, 5))
  }
}

res <- matrix(NA, ncol(vars)-1, 5)
for (i in 1:(ncol(vars)-1)){
  res[i,] <- diff.func(col.num=i)
}
res <- data.frame(res)
names(res) <- c("coef", "se", "t", "p.val", "missing")
res$var <- names(vars)[1:ncol(vars)-1]

# Tomz and Weeks variables
tw <- na.omit(res[103:nrow(res),])
tw$p.val <- round(tw$p.val, 3)
tw$lab <- c("Iron and Steel Production (Thousands of Tons)",
            "Military Expenditures (Thousands of $)",
            "Military Personnel (Thousands)",
            "Energy Consumption (Thousands of Coal-Ton Equivalents)",
            "Total Population (Thousands)",
            "Urban Population (Thousands)",
            "Composite Index of National Capability Score",
            "Number of Treaties: Defense",
            "Number of Treaties: Non-aggression",
            "Number of Treaties: Entente",
            "Number of Military Treaties (All Types)",
            "Volume of Imports",
            "Volume of Exports",
            "Total Volume of Trade (Imports + Exports)",
            "Number of Joint Military Exercises",
            "FDI: Position on a Historical-Cost Basis",
            "FDI: Net Financial Transactions",
            "FDI: Net Income")
tw <- tw[c(7, 1, 2, 5)]
tw[,c(2,3)] <- round(tw[,c(2,3)], 3)
tw$missing <- round(tw$missing*100)
stargazer(tw, summary=FALSE, rownames=FALSE)

res <- res[order(-abs(res$coef), res$p.val),]
top25 <- res[1:25,]
top25$vardes <- c(
  "Fuel exports (% of merchandise exports)",
  "Muslims as percentage of population in 1980",
  "Employment in agriculture (% of total employment)",
  "Population ages 65 and above (% of total)",
  "Hertiage Foundation Economic Freedom Index: Property Rights",
  "Hertiage Foundation Economic Freedom Index",
  "Employment in services (% of total employment)",
  "Number of Military Treaties",
  "Number of Treaties: Defense",
  "Gross enrollment ratio, pre-primary schools, total.",
  "Number of Treaties: Entente ",
  "Population ages 0-14 (% of total)",
  "Number of Treaties: Non-aggression",
  "Catholics as percentage of population in 1980",
  "Social Globalization Index",
  "Hertiage Foundation Economic Freedom Index: Trade Freedom",
  "Alternative and nuclear energy (% of total energy use)",
  "Energy imports, net (% of energy use)",
  "Country's population is majority white",
  "Services, etc., value added (% of GDP)",
  "Health expenditure, total (% of GDP)",
  "Employment in industry (% of total employment)",
  "Mortality rate, under-5 (per 1,000 live births)",
  "Average Value of Ethnolinguistic Fractionalization",
  "Armed forces personnel (% of total labor force)")
top25[,1:5] <- round(top25[,1:5],3)
top25 <- top25[,c(7,1:2,5)]
names(top25) <- c("variable_description", "coef", "SE", "percent_missing")
top25$percent_missing <- round(top25$percent_missing*100)
# Supplementary Appendix Table 3
stargazer(top25, summary = FALSE, rownames = F)
